#### "Inclusionary regimes, party institutionalization and redistribution under authoritarianism" ####
# authors: "Lars Pelke"
# date: 2020-06-16
# written under "R version 3.6.1 (2019-07-05)"

#### Preliminaries ####

R.version$version.string

# clear workspace
rm(list=ls())

#libraries
library(tidyverse)
library(ggpubr)
library(readstata13)
library(texreg)
library(dotwhisker)
library(broom.mixed)
library(countrycode)
library(imputeTS)
library(readstata13)

# set working directory

# please use the working directory, where you stored the zip-file. 


#### Load Data ####

## V-DEM
vdem <- read_csv("data/vdem_9/V-Dem-CY-Full+Others-v9.csv") 

vdem$cown <- countrycode(vdem$country_name, "country.name", "cown", warn = TRUE)
vdem$cown[vdem$country_name == "Hong Kong"] <- 715
vdem$cown[vdem$country_name == "Republic of Vietnam"] <- 817
vdem$cown <- as.integer(vdem$cown)

## Solt Inequality and Redistribution Data 

swiid <- load("data/Solt 2019 SWIID 8.2/swiid8_2.rda")
swiid8.0 <- read_csv("data/Solt 2019 SWIID 8.0//swiid.csv") 

swiid_summary$country_year <- paste(swiid_summary$country, swiid_summary$year, sep = "")
swiid8.0$country_year <- paste(swiid8.0$country, swiid8.0$year, sep = "")

setdiff(swiid_summary$country_year, swiid8.0$country_year)

swiid_summary$cown <- countrycode(swiid_summary$country, "country.name", "cown", warn = TRUE)
swiid_summary$cown[swiid_summary$country == "Hong Kong"] <- "715"
swiid_summary$cown[swiid_summary$country == "Puerto Rico"] <- "1014"
swiid_summary$cown[swiid_summary$country == "Anguilla"] <- "1022"
swiid_summary$cown[swiid_summary$country == "Micronesia"] <- "987"
swiid_summary$cown[swiid_summary$country == "Palestinian Territories"] <- "1020"
swiid_summary$cown[swiid_summary$country == "Serbia"] <- "345"
swiid_summary$cown[swiid_summary$country == "Turks and Caicos Island"] <- "1018"
swiid_summary$cown <- as.integer(swiid_summary$cown)

## Geddes Regime Type Data ##

gwf <- read.dta13("data/gwf/gwf_AllPoliticalRegimes.dta")
gwf$cown <- countrycode(gwf$gwf_country, "country.name", "cown", warn = TRUE)

## Correlates of War National Material Capabilities Dataset ##

cow <- read.dta13("data/cow/NMC_5_0.dta")

summary(cow$upop)
summary(cow$tpop)

cow <- cow %>%
  mutate(upop = ifelse(upop <0, NA, upop), 
         urban_percent = upop/tpop) %>%
  rename(cown = ccode) %>%
  select(cown, year, urban_percent)

summary(cow$urban_percent)

#### Additional Data for Hong Kong #### 
world_bank_hong_kong <- read.csv2("data/world_bank_data/hong_kong_urban2.csv", na.strings = "..")

world_bank_hong_kong$Urban.population....of.total.population. <- as.numeric(as.character(world_bank_hong_kong$Urban.population....of.total.population.))

world_bank_hong_kong <- world_bank_hong_kong %>%
  rename(year = Time,
         country_name = Country.Name, 
         urban_percent = Urban.population....of.total.population.) %>%
  select(year, urban_percent) %>%
  mutate(urban_percent = urban_percent/100)

world_bank_hong_kong$cown <- 715

cow <- rbind(cow, world_bank_hong_kong)
cow$cown[cow$cown == 816] <- 817


## World Bank Data on Manufacturing Sector ##

# Manufacturing, value added (% of GDP)

worldbank_data <- read.csv("data/world_bank_data/730db413-06a3-4f58-810a-f9341fd46150_Data.csv", na.strings = "..")

worldbank_data <- worldbank_data %>%
  rename(year = �..Time, 
         country_name = Country.Name, 
         manufacturing_percent = Manufacturing..value.added....of.GDP...NV.IND.MANF.ZS.) %>%
  select(year, country_name, manufacturing_percent)

worldbank_data <- worldbank_data[1:15840,]

worldbank_data$year <- as.numeric(as.character(worldbank_data$year))

worldbank_data$cown <- countrycode(worldbank_data$country_name, "country.name", "cown", warn = TRUE)

worldbank_data$cown[worldbank_data$country_name == "Hong Kong SAR, China"] <- 715

worldbank_data <- worldbank_data %>%
  drop_na(cown) %>%
  select(-country_name)

#### Data Manipulation ####

##Change Redistribution (rel and abs) by new calculating ##

summary(swiid_summary$rel_red)
summary(swiid_summary$abs_red)

swiid_summary <- swiid_summary %>%
  mutate(abs_red = gini_mkt - gini_disp,
         rel_red = ((gini_mkt - gini_disp)/gini_mkt)*100)

summary(swiid_summary$rel_red)
summary(swiid_summary$abs_red)

#### Merge Data ####

vdem <- vdem %>%
  left_join(swiid_summary, c("cown", "year"))

vdem <- vdem %>%
  left_join(gwf, c("cown", "year"))

vdem <- vdem %>%
  left_join(cow, c("cown", "year"))

vdem <- vdem %>%
  left_join(worldbank_data, c("cown", "year"))

summary(vdem$cown)

## Interpolate Missing Values of Manufacturing Percent GDP ##

vdem_man <- vdem %>%
  group_by(country_id) %>%
  filter(any(!is.na(manufacturing_percent))) %>%
  select(country_id, year, manufacturing_percent )

vdem_man <- vdem_man %>%
  group_by(country_id) %>%
  mutate(manufacturing_percent_ipol = na_interpolation(manufacturing_percent, option = "spline")) %>%
  select(-manufacturing_percent)

vdem <- vdem %>%
  left_join(vdem_man, c("country_id", "year"))

#### Building Variables political inclusiveness and economic inclusiveness ####

vdem <- vdem %>%
  mutate(pol_incl = (v2pepwrses+v2pepwrsoc)/2,
         eco_incl = v2dlencmps )
summary(vdem$pol_incl) #smaller values indicate more exclusionary regimes, higher values indicate more inclusionary regimes
summary(vdem$eco_incl) #smaller values indicate more exclusionary regimes, higher values indicate more inclusionary regimes

vdem <- vdem %>%
  mutate(incl = (pol_incl + eco_incl)/2) #smaller values indicate more exclusionary regimes, higher values indicate more inclusionary regimes
summary(vdem$incl) 

#### Generate variables ####

vdem <- vdem %>%
  mutate(e_wb_pop_ln = log10(e_wb_pop),
         e_total_oil_income_pc = ifelse(e_total_oil_income_pc==0, 1, e_total_oil_income_pc),
         e_total_oil_income_pc_ln = log10(e_total_oil_income_pc))

## Percent Urban Population ##

vdem <- vdem %>%
  group_by(country_id) %>%
  fill(urban_percent, .direction = "updown") # Fill missing oberservations with next or previos value. Affect 157 NA in 2013-2018

vdem$manufacturing_percent_ipol <- vdem$manufacturing_percent_ipol/100

#### Generate Competitive and Hegemonic Multiparty regimes ####

## prepare Election specific variables ##

summary(vdem$v2elmulpar_ord)
summary(vdem$v2elfrfair_ord)

vdem <- vdem %>%
  group_by(country_id) %>%
  fill(v2elmulpar_ord) %>%
  fill(v2elfrfair_ord)

vdem <- vdem %>%
  mutate(v2elfrfair_ord = if_else(is.na(v2elfrfair_ord), 0, v2elfrfair_ord), # replace NA with = 0, assumption: NA no elections in place
         v2elmulpar_ord = if_else(is.na(v2elmulpar_ord), 0, v2elmulpar_ord))


summary(vdem$v2elmulpar_ord)
summary(vdem$v2elfrfair_ord)
summary(vdem$v2elsuffrage)


vdem <- vdem %>%
  mutate(auto_regime_type = case_when(v2elmulpar_ord >=2 & v2elfrfair_ord >= 2 & v2elsuffrage > 25 ~ 2, # competitive mulitparty regime
                                      v2elmulpar_ord >=2 & v2elfrfair_ord < 2 ~ 1, # hegemonic mulitparty regime
                                      v2elmulpar_ord >=2 & v2elsuffrage <= 25 ~ 1, # hegemonic mulitparty regime
                                      v2elmulpar_ord <2  ~ 0 )) # closed autocracy

table(vdem$auto_regime_type) 
summary(vdem$auto_regime_type)

#### Export V-Dem-SWIID DATASET ####

# export basic dataset 
saveRDS(vdem, file = "data/vdem_merged.rds")

#### BUILD V-Dem-SPaW DATASET #### 

spaw <- read.dta13("data/SPAW Rasmussen 2016/CPS_replication_Final.dta")
spaw$cown <- spaw$Ccodecow

spaw <- spaw %>% 
  select(cown, year, starts_with("univers"))

vdem <- vdem %>%
  left_join(spaw, c("cown", "year"))

vdem <- vdem %>%
  mutate(univers_oldageprog = ifelse(univers_oldageprog==0, NA, 
                                     ifelse(univers_oldageprog == 1 , NA, univers_oldageprog)))

summary(vdem$univers_oldageprog)

saveRDS(vdem, file = "data/vdem_merged_spaw.rds")
